# -*- coding: utf-8 -*-

# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html

import redis


class BookUrlPipeline(object):
    def __init__(self, host='localhost', port=6379, db=0):
        self.read = redis.Redis(host=host, port=port, db=db, decode_responses=True)  # 其余均采用默认值

    @classmethod
    def from_crawler(cls, crawler):
        """信息注入，包括设置MongoDB地址与库名"""
        return cls(
            host=crawler.settings.get('REDIS_HOST'),
            port=crawler.settings.get('REDIS_PORT'),
            db=crawler.settings.get('REDIS_DB'),
        )

    def process_item(self, item, spider):
        book_id = item['book_id']
        if book_id:
            # sadd 将指定的成员添加到集合中的key，已经是此集合成员的将被忽略
            if self.read.sadd('book:id', book_id):
                self.read.lpush('book:book_url', item['book_url'])
        else:
            self.read.lpush('book:no_url', item['book_url'])
